import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
colorarr = ['#0592D0','#Cd7f32', '#E97451', '#Bdb76b', '#954535', '#C2b280', '#808000','#C2b280', '#E4d008', '#9acd32', '#Eedc82', '#E4d96f',
'#32cd32','#39ff14','#00ff7f', '#008080', '#36454f', '#F88379', '#Ff4500', '#Ffb347', '#A94064', '#E75480', '#Ffb6c1', '#E5e4e2',
'#Faf0e6', '#8c92ac', '#Dbd7d2','#A7a6ba', '#B38b6d']
cropdf = pd.read_csv("Research/datatrain2.csv")
cropdf.head()
| N | P | K | Temperature | Humidity | PH | Breed | |
|---|---|---|---|---|---|---|---|
| 0 | 121.6 | 28.6 | 58 | 29.82046 | 57.48837 | 5.99954 | AT 356 |
| 1 | 133.7 | 28.8 | 58 | 26.92841 | 54.05204 | 5.98766 | AT 356 |
| 2 | 134.6 | 39.9 | 56 | 25.78214 | 65.78586 | 5.96768 | AT 356 |
| 3 | 122.3 | 38.4 | 54 | 29.64071 | 60.17039 | 6.13613 | AT 356 |
| 4 | 137.5 | 27.7 | 55 | 28.16842 | 67.62975 | 5.54322 | AT 356 |
cropdf.shape
(2118, 7)
cropdf.columns
Index(['N', 'P', 'K', 'Temperature', 'Humidity', 'PH', 'Breed'], dtype='object')
cropdf.isnull().any()
N False P False K False Temperature False Humidity False PH False Breed False dtype: bool
print("Number of various crops: ", len(cropdf['Breed'].unique()))
print("List of crops: ", cropdf['Breed'].unique())
Number of various crops: 5 List of crops: ['AT 356' 'BG 250' 'BG 352' 'Mottaikaruppan' 'suwandel']
cropdf['Breed'].value_counts()
BG 250 528 suwandel 464 BG 352 396 Mottaikaruppan 390 AT 356 340 Name: Breed, dtype: int64
crop_summary = pd.pivot_table(cropdf,index=['Breed'],aggfunc='mean')
crop_summary.head()
| Humidity | K | N | P | PH | Temperature | |
|---|---|---|---|---|---|---|
| Breed | ||||||
| AT 356 | 68.582285 | 71.205882 | 136.739412 | 32.991765 | 6.013526 | 27.586198 |
| BG 250 | 67.539950 | 69.685606 | 130.518182 | 33.192424 | 6.065707 | 27.965815 |
| BG 352 | 70.298554 | 68.323232 | 126.328283 | 33.571717 | 6.022401 | 27.184925 |
| Mottaikaruppan | 67.730658 | 63.800000 | 132.569744 | 30.894359 | 6.098946 | 27.823929 |
| suwandel | 69.459796 | 68.517241 | 129.702586 | 33.967241 | 6.007524 | 27.902968 |
crop_summary_N = crop_summary.sort_values(by='N', ascending=False)
fig = make_subplots(rows=1, cols=3)
top = {
'y' : crop_summary_N['N'][0:10].sort_values().index,
'x' : crop_summary_N['N'][0:10].sort_values()
}
last = {
'y' : crop_summary_N['N'][-10:].index,
'x' : crop_summary_N['N'][-10:]
}
fig.add_trace(
go.Bar(top,
name="Most nitrogen required",
marker_color=random.choice(colorarr),
orientation='h',
text=top['x']),
row=1, col=1
)
fig.add_trace(
go.Bar(last,
name="Least nitrogen required",
marker_color=random.choice(colorarr),
orientation='h',
text=last['x']),
row=1, col=2
)
fig.update_traces(texttemplate='%{text}', textposition='inside')
fig.update_layout(title_text="Nitrogen (N)",
plot_bgcolor='white',
font_size=12,
font_color='black',
height=500)
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()
crop_summary_P = crop_summary.sort_values(by='P', ascending=False)
fig = make_subplots(rows=1, cols=2)
top = {
'y' : crop_summary_P['P'][0:10].sort_values().index,
'x' : crop_summary_P['P'][0:10].sort_values()
}
last = {
'y' : crop_summary_P['P'][-10:].index,
'x' : crop_summary_P['P'][-10:]
}
fig.add_trace(
go.Bar(top,
name="Most phosphorus required",
marker_color=random.choice(colorarr),
orientation='h',
text=top['x']),
row=1, col=1
)
fig.add_trace(
go.Bar(last,
name="Least phosphorus required",
marker_color=random.choice(colorarr),
orientation='h',
text=last['x']),
row=1, col=2
)
fig.update_traces(texttemplate='%{text}', textposition='inside')
fig.update_layout(title_text="Phosphorus (P)",
plot_bgcolor='white',
font_size=12,
font_color='black',
height=500)
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()
crop_summary_K = crop_summary.sort_values(by='K', ascending=False)
fig = make_subplots(rows=1, cols=2)
top = {
'y' : crop_summary_K['K'][0:10].sort_values().index,
'x' : crop_summary_K['K'][0:10].sort_values()
}
last = {
'y' : crop_summary_K['K'][-10:].index,
'x' : crop_summary_K['K'][-10:]
}
fig.add_trace(
go.Bar(top,
name="Most potassium required",
marker_color=random.choice(colorarr),
orientation='h',
text=top['x']),
row=1, col=1
)
fig.add_trace(
go.Bar(last,
name="Least potassium required",
marker_color=random.choice(colorarr),
orientation='h',
text=last['x']),
row=1, col=2
)
fig.update_traces(texttemplate='%{text}', textposition='inside')
fig.update_layout(title_text="Potassium (K)",
plot_bgcolor='white',
font_size=12,
font_color='black',
height=500)
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()
fig = go.Figure()
fig.add_trace(go.Bar(
x=crop_summary.index,
y=crop_summary['N'],
name='Nitrogen',
marker_color='indianred'
))
fig.add_trace(go.Bar(
x=crop_summary.index,
y=crop_summary['P'],
name='Phosphorous',
marker_color='lightsalmon'
))
fig.add_trace(go.Bar(
x=crop_summary.index,
y=crop_summary['K'],
name='Potash',
marker_color='crimson'
))
fig.update_layout(title="N, P, K values comparision between crops",
plot_bgcolor='white',
barmode='group',
xaxis_tickangle=-45)
fig.show()
labels = ['Nitrogen(N)','Phosphorous(P)','Potash(K)']
fig = make_subplots(rows=1, cols=5, specs=[[{'type':'domain'}, {'type':'domain'},
{'type':'domain'}, {'type':'domain'},
{'type':'domain'}]])
AT_356_npk = crop_summary[crop_summary.index=='AT 356']
values = [AT_356_npk['N'][0], AT_356_npk['P'][0], AT_356_npk['K'][0]]
fig.add_trace(go.Pie(labels=labels, values=values,name="AT 356"),1, 1)
BG_250_npk = crop_summary[crop_summary.index=='BG 250']
values = [BG_250_npk['N'][0], BG_250_npk['P'][0], BG_250_npk['K'][0]]
fig.add_trace(go.Pie(labels=labels, values=values,name="BG 250"),1, 2)
BG_352_npk = crop_summary[crop_summary.index=='BG 352']
values = [BG_352_npk['N'][0], BG_352_npk['P'][0], BG_352_npk['K'][0]]
fig.add_trace(go.Pie(labels=labels, values=values,name="BG 352"),1, 3)
Mottaikaruppan_npk = crop_summary[crop_summary.index=='Mottaikaruppan']
values = [Mottaikaruppan_npk['N'][0], Mottaikaruppan_npk['P'][0], Mottaikaruppan_npk['K'][0]]
fig.add_trace(go.Pie(labels=labels, values=values,name="Mottaikaruppan"),1, 4)
suwandel_npk = crop_summary[crop_summary.index=='suwandel']
values = [suwandel_npk['N'][0], suwandel_npk['P'][0], suwandel_npk['K'][0]]
fig.add_trace(go.Pie(labels=labels, values=values,name="suwandel"),1, 5)
fig.update_traces(hole=.4, hoverinfo="label+percent+name")
fig.update_layout(
title_text="NPK ratio for AT 356, BG 250, BG 352, Mottaikaruppan, suwandel",
annotations=[dict(text='AT 356',x=0.06,y=0.8, font_size=15, showarrow=False),
dict(text='BG 250',x=0.26,y=0.8, font_size=15, showarrow=False),
dict(text='BG 352',x=0.50,y=0.8, font_size=15, showarrow=False),
dict(text='Mottaikaruppan',x=0.74,y=0.8, font_size=15, showarrow=False),
dict(text='suwandel',x=0.94,y=0.8, font_size=15, showarrow=False)])
fig.show()
crop_scatter = cropdf[(cropdf['Breed']=='AT 356') |
(cropdf['Breed']=='BG 352') |
(cropdf['Breed']=='BG 250') |
(cropdf['Breed']=='Mottaikaruppan') |
(cropdf['Breed']=='suwandel')]
fig = px.scatter(crop_scatter, x="Temperature", y="Humidity", color="Breed", symbol="Breed")
fig.update_layout(plot_bgcolor='white')
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()
fig = px.bar(crop_summary, x=crop_summary.index, y=["PH", "Humidity", "Temperature"])
fig.update_layout(title_text="Comparision between ph, temperature and humidity",
plot_bgcolor='white',
height=500)
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()
fig, ax = plt.subplots(1, 1, figsize=(15, 9))
sns.heatmap(cropdf.corr(), annot=True,cmap='Wistia' )
ax.set(xlabel='features')
ax.set(ylabel='features')
plt.title('Correlation between different features', fontsize = 15, c='black')
plt.show()
X = cropdf.drop('Breed', axis=1)
y = cropdf['Breed']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, shuffle = True, random_state = 0)
pip install lightgbm
Requirement already satisfied: lightgbm in c:\users\kareeshan\anaconda3\lib\site-packages (4.0.0) Requirement already satisfied: scipy in c:\users\kareeshan\anaconda3\lib\site-packages (from lightgbm) (1.7.3) Requirement already satisfied: numpy in c:\users\kareeshan\anaconda3\lib\site-packages (from lightgbm) (1.21.5) Note: you may need to restart the kernel to use updated packages.
# build the lightgbm model
import lightgbm as lgb
model = lgb.LGBMClassifier()
model.fit(X_train, y_train)
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000094 seconds. You can set `force_col_wise=true` to remove the overhead. [LightGBM] [Info] Total Bins 655 [LightGBM] [Info] Number of data points in the train set: 1482, number of used features: 6 [LightGBM] [Info] Start training from score -1.850109 [LightGBM] [Info] Start training from score -1.368903 [LightGBM] [Info] Start training from score -1.659241 [LightGBM] [Info] Start training from score -1.732803 [LightGBM] [Info] Start training from score -1.508134
LGBMClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LGBMClassifier()
# predict the results
y_pred=model.predict(X_test)
# view accuracy
from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y_pred, y_test)
print('LightGBM Model accuracy score: {0:0.4f}'.format(accuracy_score(y_test, y_pred)))
LightGBM Model accuracy score: 0.8208
y_pred_train = model.predict(X_train)
print('Training-set accuracy score: {0:0.4f}'. format(accuracy_score(y_train, y_pred_train)))
Training-set accuracy score: 1.0000
# print the scores on training and test set
print('Training set score: {:.4f}'.format(model.score(X_train, y_train)))
print('Test set score: {:.4f}'.format(model.score(X_test, y_test)))
Training set score: 1.0000 Test set score: 0.8208
# view confusion-matrix
# Print the Confusion Matrix and slice it into four pieces
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(15,15))
sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Confusion Matrix - score:'+str(accuracy_score(y_test,y_pred))
plt.title(all_sample_title, size = 15);
plt.show()
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))
precision recall f1-score support
AT 356 0.93 0.74 0.82 107
BG 250 0.78 0.83 0.80 151
BG 352 0.82 0.89 0.86 114
Mottaikaruppan 0.86 0.88 0.87 128
suwandel 0.76 0.76 0.76 136
accuracy 0.82 636
macro avg 0.83 0.82 0.82 636
weighted avg 0.83 0.82 0.82 636
newdata=model.predict([[90, 42, 43, 20.879744, 75, 5.5]])
newdata
array(['BG 352'], dtype=object)
pip install joblib
Requirement already satisfied: joblib in c:\users\kareeshan\anaconda3\lib\site-packages (1.3.1) Note: you may need to restart the kernel to use updated packages.
#from lightgbm import LGBMClassifier
#from joblib import dump
#lgbm_classification = LGBMClassifier()
#lgbm_classification.fit(X_train, y_train)
#joblib.dump(lgbm_classification, 'train_model2.joblib')#
from lightgbm import LGBMClassifier
from joblib import dump
lgbm_classification = LGBMClassifier()
lgbm_classification.fit(X_train, y_train)
dump(lgbm_classification, 'train_model2.joblib')
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000284 seconds. You can set `force_col_wise=true` to remove the overhead. [LightGBM] [Info] Total Bins 655 [LightGBM] [Info] Number of data points in the train set: 1482, number of used features: 6 [LightGBM] [Info] Start training from score -1.850109 [LightGBM] [Info] Start training from score -1.368903 [LightGBM] [Info] Start training from score -1.659241 [LightGBM] [Info] Start training from score -1.732803 [LightGBM] [Info] Start training from score -1.508134
['train_model2.joblib']